# PROGRAM TO ESTIMATE HAZARD FUNCIONS FOR INPUTS TO MARKOV MODEL

library(dplyr)
library(heemod)
library(survival)
library(muhaz)


# SET CURRENT DIRECTORY AS WORKING DIRECTORY 
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# SYNTHETICALLY RECREATE THE IPD FROM K-M CURVES 
# USING GUYOT ALGORITHMS IN FILE "FUNCTION_GUYOT_1.R"
#THE INPUT DATA ARE IN CSV FILES "PFS_PEMBRO_INTV.csv" AND "PFS_PEMBRO_CONT.csv"
#USER NEEDS TO MANUALLY FILL IN NUMBERS AT RISK IN THE R FILE 
#"DIGITALIZACION PEMBRO.R"

#NOTE: THE USER FIRST NEEDS TO MANUALLY CREATE AN EMPTY FOLDER IN THE WORKING DIRECTORY CALLED
#"RESULTADOS"
source("DIGITALIZACION PEMBRO.R")

#NAMES OF THE AVAILABLE MODELS
PFS_fco2_mod$NOMBRE_MODELO

#criteria to choose best fitting function: 
#1. visual inspection of observed vs estimated survival
#2. visual inspection of observed vs estimated hazards
#3. lowest AIC

#FOLLOW UP TIME OF THE CLINICAL STUDY
horizon<-48
#function to  GRAPH OBSERVED (KAPLAN MEIER) AND ESTIMATED SURVIVAL FUNCION 
# FOR DISTRIBUTION "X"
choose_flexsurv<-function(
    outcome=c("PFS","OS"),
    x = 1,
    hr = rr,
    time=c(0:horizon),
    t_validate=24
) {
  
  control = paste(outcome,"_fco2_mod$modelos",sep="")
  km_p_2=paste(outcome,"_fco2_mod$KAPLAN",sep="") #control
  km_p_1=paste(outcome,"_fco1_mod$KAPLAN",sep="")  # intervention
  # distribution flexsurv model
  dist<-names(eval(parse(text=control))) #drop down list of model names
  aic_model<-eval(parse(text=paste(outcome,"_fco2_mod$AIC$AIC",sep="")))
  aic_model<-aic_model[x]
  pfs2<-eval(
    parse(text=
            paste(control,
                  "$",
                  dist[x],
                  sep = "")))
  
  pfs1 <- apply_hr(pfs2,hr) #intervention
  
  km_p_1<-eval(parse(text=km_p_1))
  km_p_2<-eval(parse(text=km_p_2))
  
  plot.data1<-data.frame(
    Time=time,
    PFS_1=compute_surv(pfs1,time,1,"survival"),
    PFS_2=compute_surv(pfs2,time,1,"survival"),
    KM_1=compute_surv(km_p_1,time,1,"survival"),
    KM_2=compute_surv(km_p_2,time,1,"survival")
  )
  place <- 70
  p<-ggplot(data=plot.data1,mapping=aes(x=plot.data1$Time,y=plot.data1[,2])) + ylim(0,1)
  p<-p + geom_line(data=data.frame(cbind(plot.data1$Time,plot.data1$PFS_1)),
                   aes(x=plot.data1$Time,y=plot.data1$PFS_1),colour="red")
  
  p<-p + geom_step(data=data.frame(cbind(plot.data1$Time,plot.data1$KM_1)),
                   aes(x=plot.data1$Time,y=plot.data1$KM_1),colour="red",linetype=2)
  
  p<-p + geom_line(data=data.frame(cbind(plot.data1$Time,plot.data1$PFS_2)),
                   aes(x=plot.data1$Time,y=plot.data1$PFS_2),colour="blue")
  
  
  p<-p + geom_step(data=data.frame(cbind(plot.data1$Time,plot.data1$KM_2)),
                   aes(x=plot.data1$Time,y=plot.data1$KM_2),colour="blue",linetype=2)
  p<-p + annotate("text",x=place,y=0.9,size=3,
                  label=paste("Survival probabilities at month",t_validate,sep=" "))
  p<-p + annotate("text",x=place,y=0.8,size=3,
                  label=paste(dist[x],sep=" "))
  p<-p + annotate("text",x=place,y=0.7,size=3,
                  label=paste("Treatment 1", 
                  round(plot.data1[t_validate,"PFS_1"],2),sep=" "))
  p<-p + annotate("text",x=place,y=0.6,size=3,
                  label=paste("Treatment 2", 
                  round(plot.data1[t_validate,"PFS_2"],2),sep=" "))
  p<-p + annotate("text",x=place+16,y=0.5,size=3,
                  label=paste("HR",hr,sep=" "))
  p<-p + annotate("text",x=place,y=0.5,size=3,
                  label=paste("AIC",aic_model,sep=" "))
  p<-p + annotate("text",x=place+16,y=0.8,size=3,
                  label=paste("Kaplan-Meier",sep=" "))
  p<-p + annotate("text",x=place+16,y=0.7,size=3,
                  label=round(plot.data1[t_validate,"KM_1"],2))
  p<-p + annotate("text",x=place+16,y=0.6,size=3,
                  label=round(plot.data1[t_validate,"KM_2"],2))
  p<-p + ggtitle(outcome)
  p<-p+ labs(x="Time, months",y="Survival probability")
  return(list(p=p,surv1=pfs1,surv2=pfs2))
}
load(file="modelos_PEMBRO.RData")
#GENERATE A GRAPH OF OBSERVED AND ESTIMATED PFS FOR FUNCTION dist_t
rr<-0.63  #(0,48; 0,82)
dist_t<-8  # corresponds to generalized f
choose_flexsurv("PFS",dist_t,rr,t_validate=24)


# GENERATE A GRAPH OF THE K-M HAZARDS AND THE ESTIMATED HAZARDS
model.pred<-cbind(predict(PFS_fco2_mod$modelos[[1]],type="hazard",times=c(1:horizon))[[1]][[1]],m=PFS_fco2_mod$NOMBRE_MODELO[1])
for (i in 2:8) {
new.rows<-cbind(predict(PFS_fco2_mod$modelos[[i]],type="hazard",times=c(1:horizon))[[1]][[1]],m=PFS_fco2_mod$NOMBRE_MODELO[i])
model.pred<-rbind(model.pred,new.rows)
}
model.pred<-as.data.frame(model.pred)
kp<-kphaz.fit(PFS_fco2_mod$IPD$time,PFS_fco2_mod$IPD$status)
km<-data.frame(.time=kp$time, .pred_hazard=kp$haz, m="K-M")

colnames(model.pred)= c(".time",".pred_hazard","m")
plotdata<-model.pred[model.pred$.time<=48,]

ggplot(data=plotdata,aes(x=.time,y=.pred_hazard, color=m)) +
  geom_line() +
   geom_smooth(data=km) 

model.pred.wide <- reshape(model.pred, v.names = ".pred_hazard", idvar = ".time",
                timevar = "m", direction = "wide")


write.xlsx(model.pred.wide, file = "hazard_pfs.xlsx")

# AIC  OF ALL THE MODELS
PFS_fco2_mod$AIC